*********************************************************************************
/*Create scatter plot for total cost by TPI Bin for Correspondence and In-Person*/
*********************************************************************************
clear all

// * Set directories 
// global dropbox "${user}/Opportunity Insights Dropbox"
// global oi_home "${dropbox}/Opportunity Insights Shared Workspace"
// global dropbox_local "${dropbox}/${dropbox_name}"
// global mvpf_audits "${dropbox_local}/MVPF-Tax-Audits" 

adopath + "${oi_home}/Research files/ado"

* File paths
local datapath "${mvpf_audits}/data/cleaned" 
local graphpath "${graphpath}" 

* Import data 
use "`datapath'/audit_tab_prim_inc_${min_yr}_${max_yr}.dta", clear


* Groups of variables
#delimit ;
local rev_vars = "rev_tot rev_appl rev_cnsl rev_exam
				  rev_coll_notice rev_coll"
				  ;
			
local cost_vars = "costs_coll costs_exam costs_appl costs_tot
				labor_costs_exam${step} labor_costs_appl${step} labor_costs_coll
				labor_costs_cnsl${step} oh_costs_exam oh_costs_appl oh_costs_coll
				costs_notices costs_acs
				costs_labor costs_labor_future costs_oh" 
				;
#delimit cr 

local exam_type = "operational correspondence"

* Variables of interest 
local vars `cost_vars' `rev_vars'

* Version of fig to produce 
global version  ${version} // paper or slides

* Other formatting
if "${version}" == "slides" {
	global title "title(" ", size(vhuge))"
	global img wmf
	graph set window fontface "Arial"
	global title "title(" ", size(vhuge))"
	global axis_size "" //xsize(6.5) ysize(4)" // makes figure wider
}

else if "${version}" == "paper" {
	global title "title("")"
	global img pdf
	graph set window fontface "LMRoman10-Regular"
	global title " "
	global axis_size " " // makes figure wider
}

*Keep only if exam_type is Operational or Correspondence
keep if exam_type_pooled <= 3

*Create new aggregate total cost variable
g aggregate_costs = obs2 * costs_tot_future
g aggregate_rev = obs2 * rev_tot_future
***********************************************
* Total Expenditure by Exam Type Scatter Plot *
***********************************************

preserve
*Create Graph
	* set y-axis label, title, and figure name
	local ylab = "Total Cost of Audits (Million $)"
	lab val ${inc}_pctl ${inc}_pctl_label
		
	* xtitle
	global xtitle = "TPI percentile group"

	* Preferences 
	local symbsize small
	local textsize vsmall
	

	local lb = 0
	local ub = 800000000
	local int = 200000000	
	global ylab "ylabel(0 "0" 200000000 "200" 400000000 "400" 600000000 "600" 800000000 "800", nogrid format(%9.0fc))"


	*** Subgroup figure
	local fig_name = "scatter_total_costs_${inc}"
	
	local lb_f : di %02.1f `lb'
	local ub_f : di %02.1f `ub'

	tw  (scatter aggregate_costs ${inc}_pctl if inrange(${inc}_pctl, 1, 22) & 		exam_type_pooled == 3, ///
		mcolor("0 114 178") msymbol(C) msize(`symbsize')) ///
		(scatter aggregate_costs ${inc}_pctl if inrange(${inc}_pctl, 1, 22) & exam_type_pooled == 1, ///
		mcolor(cranberry) msymbol(T) msize(`symbsize')), ///
		yscale(r(`lb_f' `ub_f')) ///
		ytitle("`ylab'") xtitle("${xtitle}")  legend(off) ///
		${title} ${axis_size} ${ylab} ///
		xlabel(1(1)22, value labsize(small) tl(.5) nogrid angle(45))
	graph export "`graphpath'/`fig_name'.wmf", as(wmf) replace
	
restore

*************************************************
* Percent of Total Expenditure Stacked Bar Plot *
*************************************************

*Create Percent Cost Graph
preserve

	local fig_name = "scatter_percemt_costs_${inc}"
	collapse (sum) aggregate_costs, by(tpi_pctl)
	merge 1:m tpi_pctl using "`datapath'/audit_tab_prim_inc_${min_yr}_${max_yr}.dta"

	*Keep only if exam_type is Operational or Correspondence
	keep if exam_type_pooled == 1
	drop if tpi_pctl == 0

	*Create new operational percent of total cost variable
	g percent_operational = (costs_tot_future * obs2) / aggregate_costs
	g percent_correspondence = 1 - percent_operational

	global ylab "ylabel(0 "0%" 0.2 "20%" 0.4 "40%" 0.6 "60%" 0.8 "80%" 1 "100%", nogrid format(%9.0fc))"

	* set y-axis label, title, and figure name
	local ylab = "% of Total Audit Cost"
	lab val ${inc}_pctl ${inc}_pctl_label
			
	* xtitle
	global xtitle = "TPI percentile group"

	graph bar percent_operational percent_correspondence, over(tpi_pctl, label(angle(45) labsize(small))) legend(off) bar(1, color(cranberry)) bar(2, color("0 114 178")) ${ylab} ytitle("`ylab'") stack
	graph export "`graphpath'/`fig_name'.wmf", as(wmf) replace

restore


********************************************************************
* Total Expenditure by Exam Type Stacked Bar Plot - Pooling Top 1 %*
********************************************************************
*Create Stacked Expenditure Graph
preserve

	local fig_name = "scatter_stacked_costs_${inc}"
	g operations_costs = aggregate_costs if exam_type_pooled == 1
	g correspondence_costs = aggregate_costs if exam_type_pooled == 3
	
	collapse (sum) exam_type_pooled (max) operations_costs correspondence_costs, by(tpi_pctl)
	
	*Aggregate the top 1 %
	egen top1_oc = total(operations_costs) if tpi_pctl >= 21
	egen top1_cc = total(correspondence_costs) if tpi_pctl >= 21

	replace operations_costs = top1_oc if tpi_pctl == 21
	replace correspondence_costs = top1_cc if tpi_pctl == 21
	
	drop if tpi_pctl == 0 | tpi_pctl == 22

	* set y-axis label, title, and figure name
	local ylab = "Total Cost of Audits (Million $)"
	
	label define ${inc}_pctl_label_new 1 "0-5" 2 "5-10" 3 "10-15" 4 "15-20" 5 "20-25" 6 "25-30" 7 "30-35" 8 "35-40" 9 "40-45" 10 "45-50" 11 "50-55" 12 "55-60" 13 "60-65" 14 "65-70" 15 "70-75" 16 "75-80" 17 "80-85" 18 "85-90" 19 "90-95" 20 "95-99" 21 "99+"

	label values ${inc}_pctl ${inc}_pctl_label_new
	
	local lb = 0
	local ub = 800000000
	local int = 200000000	
	global ylab "ylabel(0 "0" 200000000 "200" 400000000 "400" 600000000 "600" 800000000 "800" 1000000000 "1000", nogrid format(%9.0fc))"
	
	
	* xtitle
	global xtitle = "TPI percentile group"

	graph bar operations_costs correspondence_costs, over(tpi_pctl, label(angle(45) labsize(small))) legend(off) bar(1, color(cranberry)) bar(2, color("0 114 178"))  ytitle("`ylab'") ${title} ${axis_size} ${ylab} stack
	graph export "`graphpath'/`fig_name'.wmf", as(wmf) replace

restore



*********************************************************************
* Total Expenditure by Exam Type Stacked Bar Plot - Pooling Top 5 % *
*********************************************************************
*Create Stacked Expenditure Graph
preserve

	local fig_name = "scatter_stacked_costs_v2_${inc}"
	g operations_costs = aggregate_costs if exam_type_pooled == 1
	g correspondence_costs = aggregate_costs if exam_type_pooled == 3
	
	collapse (sum) exam_type_pooled (max) operations_costs correspondence_costs, by(tpi_pctl)
	
	*Aggregate the top 1 %
	egen top1_oc = total(operations_costs) if tpi_pctl >= 20
	egen top1_cc = total(correspondence_costs) if tpi_pctl >= 20

	replace operations_costs = top1_oc if tpi_pctl == 20
	replace correspondence_costs = top1_cc if tpi_pctl == 20
	
	drop if tpi_pctl == 0 | tpi_pctl == 21 | tpi_pctl == 22

	* set y-axis label, title, and figure name
	local ylab = "Total Cost of Audits (Million $)"
	
	label define ${inc}_pctl_label_new 1 "0-5" 2 "5-10" 3 "10-15" 4 "15-20" 5 "20-25" 6 "25-30" 7 "30-35" 8 "35-40" 9 "40-45" 10 "45-50" 11 "50-55" 12 "55-60" 13 "60-65" 14 "65-70" 15 "70-75" 16 "75-80" 17 "80-85" 18 "85-90" 19 "90-95" 20 "95+"

	label values ${inc}_pctl ${inc}_pctl_label_new
	
	local lb = 0
	local ub = 2000000000
	local int = 400000000	
	global ylab "ylabel(0 "0" 400000000 "400" 800000000 "800" 1200000000 "1200" 1600000000 "1600" 2000000000 "2000", nogrid format(%9.0fc))"
	
	
	* xtitle
	global xtitle = "TPI percentile group"

	graph bar operations_costs correspondence_costs, over(tpi_pctl, label(angle(45) labsize(small))) legend(off) bar(1, color(cranberry)) bar(2, color("0 114 178"))  ytitle("`ylab'") ${title} ${axis_size} ${ylab} stack
	graph export "`graphpath'/`fig_name'.wmf", as(wmf) replace

restore


********************************************************************
* Yearly Expenditure by Exam Type Stacked Bar Plot - Pooling Top 1 %*
********************************************************************
*Create Stacked Expenditure Graph
preserve

	local fig_name = "scatter_yearly_stacked_costs_${inc}"
	g operations_costs = aggregate_costs if exam_type_pooled == 1
	g correspondence_costs = aggregate_costs if exam_type_pooled == 3
	
	collapse (sum) exam_type_pooled (max) operations_costs correspondence_costs, by(tpi_pctl)
	
	*Aggregate the top 1 %
	egen top1_oc = total(operations_costs) if tpi_pctl >= 21
	egen top1_cc = total(correspondence_costs) if tpi_pctl >= 21

	replace operations_costs = top1_oc if tpi_pctl == 21
	replace correspondence_costs = top1_cc if tpi_pctl == 21
	
	g operations_costs_yearly = operations_costs/5
	g correspondence_costs_yearly = correspondence_costs/5
	
	drop if tpi_pctl == 0 | tpi_pctl == 22

	* set y-axis label, title, and figure name
	local ylab = "Total Cost of Audits (Million $)"
	
	label define ${inc}_pctl_label_new 1 "0-5" 2 "5-10" 3 "10-15" 4 "15-20" 5 "20-25" 6 "25-30" 7 "30-35" 8 "35-40" 9 "40-45" 10 "45-50" 11 "50-55" 12 "55-60" 13 "60-65" 14 "65-70" 15 "70-75" 16 "75-80" 17 "80-85" 18 "85-90" 19 "90-95" 20 "95-99" 21 "99+"

	label values ${inc}_pctl ${inc}_pctl_label_new
	
	local lb = 0
	local ub = 200000000
	local int = 40000000	
	global ylab "ylabel(0 "0" 40000000 "40" 80000000 "80" 120000000 "120" 160000000 "160" 200000000 "200", nogrid format(%9.0fc))"
	
	
	* xtitle
	global xtitle = "TPI percentile group"

	graph bar operations_costs_yearly correspondence_costs_yearly, over(tpi_pctl, label(angle(45) labsize(small))) legend(off) bar(1, color(cranberry)) bar(2, color("0 114 178"))  ytitle("`ylab'") ${title} ${axis_size} ${ylab} stack
	graph export "`graphpath'/`fig_name'.wmf", as(wmf) replace

restore



*********************************************************************
* Yearly Expenditure by Exam Type Stacked Bar Plot - Pooling Top 5 % *
*********************************************************************
*Create Stacked Expenditure Graph
preserve

	local fig_name = "scatter_yearly_stacked_costs_v2_${inc}"
	g operations_costs = aggregate_costs if exam_type_pooled == 1
	g correspondence_costs = aggregate_costs if exam_type_pooled == 3
	
	collapse (sum) exam_type_pooled (max) operations_costs correspondence_costs, by(tpi_pctl)
	
	*Aggregate the top 1 %
	egen top1_oc = total(operations_costs) if tpi_pctl >= 20
	egen top1_cc = total(correspondence_costs) if tpi_pctl >= 20

	replace operations_costs = top1_oc if tpi_pctl == 20
	replace correspondence_costs = top1_cc if tpi_pctl == 20
	
	g operations_costs_yearly = operations_costs/5
	g correspondence_costs_yearly = correspondence_costs/5
	
	drop if tpi_pctl == 0 | tpi_pctl == 21 | tpi_pctl == 22

	* set y-axis label, title, and figure name
	local ylab = "Total Cost of Audits (Million $)"
	
	label define ${inc}_pctl_label_new 1 "0-5" 2 "5-10" 3 "10-15" 4 "15-20" 5 "20-25" 6 "25-30" 7 "30-35" 8 "35-40" 9 "40-45" 10 "45-50" 11 "50-55" 12 "55-60" 13 "60-65" 14 "65-70" 15 "70-75" 16 "75-80" 17 "80-85" 18 "85-90" 19 "90-95" 20 "95+"

	label values ${inc}_pctl ${inc}_pctl_label_new
	
	local lb = 0
	local ub = 400000000
	local int = 80000000	
	global ylab "ylabel(0 "0" 80000000 "80" 160000000 "160" 240000000 "240" 320000000 "320" 400000000 "400", nogrid format(%9.0fc))"
	
	
	* xtitle
	global xtitle = "TPI percentile group"

	graph bar operations_costs_yearly correspondence_costs_yearly, over(tpi_pctl, label(angle(45) labsize(small))) legend(off) bar(1, color(cranberry)) bar(2, color("0 114 178"))  ytitle("`ylab'") ${title} ${axis_size} ${ylab} stack
	graph export "`graphpath'/`fig_name'.wmf", as(wmf) replace

restore


*********************************************************************
* R/C by Exam Type Stacked Bar Plot - Pooling Top 5 % *
*********************************************************************
*Create Stacked Expenditure Graph
preserve

	local fig_name = "stacked_rc_${inc}"
	g operations_rc = aggregate_rev/aggregate_costs if exam_type_pooled == 1
	g correspondence_rc = aggregate_rev/aggregate_costs if exam_type_pooled == 3

	
	collapse (sum) exam_type_pooled (max) operations_rc correspondence_rc, by(tpi_pctl)
	
	*Aggregate the top 1 %
	egen top1_oc = mean(operations_rc) if tpi_pctl >= 20
	egen top1_cc = mean(correspondence_rc) if tpi_pctl >= 20

	replace operations_rc = top1_oc if tpi_pctl == 20
	replace correspondence_rc = top1_cc if tpi_pctl == 20
	
	drop if tpi_pctl == 0 | tpi_pctl == 21 | tpi_pctl == 22

	* set y-axis label, title, and figure name
	local ylab = "Revenue per Dollar of Audits"
	
	label define ${inc}_pctl_label_new 1 "0-5" 2 "5-10" 3 "10-15" 4 "15-20" 5 "20-25" 6 "25-30" 7 "30-35" 8 "35-40" 9 "40-45" 10 "45-50" 11 "50-55" 12 "55-60" 13 "60-65" 14 "65-70" 15 "70-75" 16 "75-80" 17 "80-85" 18 "85-90" 19 "90-95" 20 "95+"

	label values ${inc}_pctl ${inc}_pctl_label_new
	
	local lb = 0
	local ub = 400000000
	local int = 80000000	
	global ylab "ylabel(0 "0" 2 "2" 4 "4" 6 "6" 8 "8" 10 "10", nogrid format(%9.0fc))"
	
	
	* xtitle
	global xtitle = "TPI percentile group"

	graph bar operations_rc correspondence_rc, over(tpi_pctl, label(angle(45) labsize(small))) legend(off) bar(1, color(cranberry)) bar(2, color("0 114 178"))  ytitle("`ylab'") ${title} ${axis_size} ${ylab} stack
	graph export "`graphpath'/`fig_name'.wmf", as(wmf) replace

restore



*********************************************************************
* R/C by Exam Type Stacked Bar Plot - Pooling Top 1 % *
*********************************************************************
*Create Stacked Expenditure Graph
preserve

	local fig_name = "stacked_rc_v2_${inc}"
	g operations_rc = aggregate_rev/aggregate_costs if exam_type_pooled == 1
	g correspondence_rc = aggregate_rev/aggregate_costs if exam_type_pooled == 3

	
	collapse (sum) exam_type_pooled (max) operations_rc correspondence_rc, by(tpi_pctl)
	
	*Aggregate the top 1 %
	egen top1_oc = mean(operations_rc) if tpi_pctl >= 21
	egen top1_cc = mean(correspondence_rc) if tpi_pctl >= 21

	replace operations_rc = top1_oc if tpi_pctl == 21
	replace correspondence_rc = top1_cc if tpi_pctl == 21
	
	drop if tpi_pctl == 0 | tpi_pctl == 22

	* set y-axis label, title, and figure name
	local ylab = "Revenue per Dollar of Audits"
	
	label define ${inc}_pctl_label_new 1 "0-5" 2 "5-10" 3 "10-15" 4 "15-20" 5 "20-25" 6 "25-30" 7 "30-35" 8 "35-40" 9 "40-45" 10 "45-50" 11 "50-55" 12 "55-60" 13 "60-65" 14 "65-70" 15 "70-75" 16 "75-80" 17 "80-85" 18 "85-90" 19 "90-95" 20 "95-99" 21 "99+"

	label values ${inc}_pctl ${inc}_pctl_label_new
	
	local lb = 0
	local ub = 400000000
	local int = 80000000	
	global ylab "ylabel(0 "0" 5 "5" 10 "10" 15 "15", nogrid format(%9.0fc))"
	
	
	
	* xtitle
	global xtitle = "TPI percentile group"

	graph bar operations_rc correspondence_rc, over(tpi_pctl, label(angle(45) labsize(small))) legend(off) bar(1, color(cranberry)) bar(2, color("0 114 178"))  ytitle("`ylab'") ${title} ${axis_size} ${ylab} stack
	graph export "`graphpath'/`fig_name'.wmf", as(wmf) replace

restore


*********************************************************************
*                      Table of Spending by Type                    *
*********************************************************************

preserve 
local datapath "${mvpf_audits}/data/cleaned" 
use "`datapath'/audit_tab_prim_year.dta", clear
keep if inrange(primary_yr, 2010, 2014)

g aggregate_collections = costs_coll * obs2
g aggregate_exams = costs_exam * obs2
g aggregate_appeals = costs_appl * obs2
g aggregate_future = costs_future * obs2

*Keep only Correspondence and In-Person
keep if exam_type_pooled < 4

*Aggregate by year
collapse (sum) aggregate*, by(primary_yr exam_type_pooled)

*Create Table
export excel using costs_byyear.xlsx, replace


restore







